# coding=utf8
import time
from urllib.parse import urlparse

from bson import ObjectId
from selenium import webdriver

from libs.help import get_num_str
from models.drugs import model_drug_item, model_drug_vendor


def driver_new_session(proxy=False, show_image=True):
    options = webdriver.ChromeOptions()
    # options.add_argument('--allow-running-insecure-content')
    # options.add_argument('--disable-web-security')
    # options.add_argument('--no-referrers')

    if proxy:
        options.add_argument('--proxy-server=socks5://192.168.1.153:9050')

    return webdriver.Remote(
        command_executor='http://127.0.0.1:9515',
        desired_capabilities=options.to_capabilities()
    )


def login(driver):
    driver.get('http://lchudifyeqm4ldjj.onion/?category=104')
    driver.find_element_by_css_selector('#login').send_keys('sirewaggish')
    driver.find_element_by_css_selector('#password').send_keys('123456a')
    input("请输入验证码")
    driver.find_element_by_css_selector(
        'body > div.main > div.login.notloggedin > form > div > div.actionContainer > input[type="submit"]').click()
    return driver


def get_list(driver, page):
    driver.get(
        'http://lchudifyeqm4ldjj.onion/?page={}&category=104'.format(str(page))
    )
    hrefs = []
    for item_div in driver.find_elements_by_css_selector('div.shop div.around'):
        href = item_div.find_element_by_css_selector('div.text.oTitle > a').get_attribute('href')
        # print(href)
        hrefs.append(href)
    return hrefs


def save_item(driver, pid, href):
    print(href)
    # 解析url
    query = urlparse(href).query
    item_id = get_num_str(query)
    item = model_drug_item.find_one({'GoodId': item_id})
    if item:
        print('数据存在,直接返回')
        return False

    driver.get(href)
    drug_item = {
        'Pid': pid,
        'Url': href,
        'GoodId': item_id,
        'Title': driver.find_element_by_css_selector('body > div.main > div.content > div > div.title').get_attribute(
            'innerHTML'),
        'Vendor': driver.find_element_by_css_selector(
            'body > div.main > div.content > div > div.tabularDetails > div:nth-child(1) > span').get_attribute(
            'innerHTML').strip(),  # 不具体解析
        'Price': driver.find_element_by_css_selector(
            'body > div.main > div.content > div > div.tabularDetails > div:nth-child(2) > span').get_attribute(
            'innerHTML'),
        'ShipsTo': driver.find_element_by_css_selector(
            'body > div.main > div.content > div > div.tabularDetails > div:nth-child(3) > span').get_attribute(
            'innerHTML').strip(),
        'ShipsFrom': driver.find_element_by_css_selector(
            'body > div.main > div.content > div > div.tabularDetails > div:nth-child(4) > span').get_attribute(
            'innerHTML').strip(),
        'Escrow': driver.find_element_by_css_selector(
            'body > div.main > div.content > div > div.tabularDetails > div:nth-child(5) > span').get_attribute(
            'innerHTML').strip(),
        'Images': driver.find_element_by_css_selector(
            'body > div.main > div.content > div > div.offerImage').get_attribute('innerHTML').strip(),
        'Description': driver.find_element_by_css_selector('#offerDescription > pre').get_attribute('innerHTML')
    }
    vendor_info = {
        'Name': driver.find_element_by_css_selector(
            'div.subtitle > a').get_attribute('innerHTML'),
        'Conditions': driver.find_elements_by_css_selector(
            'div.preformattedNotes > pre')[1].get_attribute(
            'innerHTML')
    }
    model_drug_item.insert_one(drug_item)

    db_vendor_info = model_drug_vendor.find_one({'Name': vendor_info['Name']})
    if db_vendor_info:
        print("供货商已存在")
    else:
        model_drug_vendor.insert_one(vendor_info)

    return True


if __name__ == '__main__':
    driver = driver_new_session(True)

    driver = login(driver)
    # driver.get('http://lchudifyeqm4ldjj.onion/?category=104')
    page = 550
    pid = ObjectId()
    while True:
        page -= 1
        print(page)
        for item_href in get_list(driver, page):
            try:
                re = save_item(driver, pid, item_href)
            except:
                print('\n出错了,等待人工验证\n')
                input('验证完成后回车')
                continue
            if not re:
                continue
            time.sleep(3)
        time.sleep(4)
    driver.close()
